This case study is Monthly production central statistics of food and beverages from year 1997-2011. The data is in the csv format.
Acquire the data from the csv file and load it.
setwd="F:\\GLIMS-PGPBDA\\Assignments\\SVAP-Assignment\\Assignment-3\\"
rawdata = read.csv("Production-Department_of_Agriculture_and_Cooperation_1.csv")
str(rawdata)
'data.frame': 429 obs. of 25 variables:
$ Particulars: Factor w/ 429 levels "(DC)Agricultural Coverage Under Irrigation",..: 8 109 119 121 140 141 226 106 107 108 ...
$ Frequency : Factor w/ 1 level "Annual, Ending mar Of Each Year": 1 1 1 1 1 1 1 1 1 1 ...
$ Unit : Factor w/ 7 levels "%","Bale mn",..: 5 5 5 5 5 5 5 5 5 5 ...
$ X3.1993 : int NA NA NA NA NA NA NA NA NA NA ...
$ X3.1994 : int NA NA NA NA NA NA NA NA NA NA ...
$ X3.1995 : int NA NA NA NA NA NA NA NA NA NA ...
$ X3.1996 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.1997 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.1998 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.1999 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.2000 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.2001 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.2002 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.2003 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.2004 : num NA NA NA NA NA NA NA NA NA NA ...
$ X3.2005 : num 198.4 103.3 95.1 83.1 72.2 ...
$ X3.2006 : num 208.6 109.9 98.7 91.8 78.3 ...
$ X3.2007 : num 217.3 110.6 106.7 93.4 80.2 ...
$ X3.2008 : num 230.8 121 109.8 96.7 82.7 ...
$ X3.2009 : num 234.5 118.1 116.3 99.2 84.9 ...
$ X3.2010 : num 218.1 104 114.2 89.1 75.9 ...
$ X3.2011 : num 244.5 120.9 123.6 96 80.7 ...
$ X3.2012 : num 259.3 131.3 128 105.3 92.8 ...
$ X3.2013 : num 257.1 128.1 129.1 105.2 92.4 ...
$ X3.2014 : num 264 129 135 106 92 ...
Have only the data related to area wise food grain production
library(tidyr)
rawdata = rawdata[grepl('Agricultural Production Foodgrains Area ',rawdata$Particulars),]
rawdata = rawdata[!grepl('Agricultural Production Foodgrains Area 5 Yr',rawdata$Particulars),]
rawdata = rawdata[,-2:-14]
str(rawdata)
'data.frame': 18 obs. of 12 variables:
$ Particulars: Factor w/ 429 levels "(DC)Agricultural Coverage Under Irrigation",..: 10 11 12 13 14 15 16 17 18 19 ...
$ X3.2004 : num 6.81 2.74 7.01 5.07 4.03 ...
$ X3.2005 : num 6.27 2.58 6.46 5.13 3.72 ...
$ X3.2006 : num 7.17 2.6 6.55 5.15 3.97 ...
$ X3.2007 : num 7.27 2.38 6.7 5.06 4.57 ...
$ X3.2008 : num 7.39 2.52 7.03 5.08 4.48 ...
$ X3.2009 : num 7.44 2.67 6.92 4.96 4.06 ...
$ X3.2010 : num 6.67 2.7 6.63 4.86 3.69 ...
$ X3.2011 : num 8.03 2.77 6.24 4.96 4.53 ...
$ X3.2012 : num 7.29 2.74 6.7 4.96 4.74 ...
$ X3.2013 : num 6.85 2.52 6.71 5.04 3.68 ...
$ X3.2014 : num NA NA NA NA NA NA NA NA NA NA ...
Do the following transformations in the data. Split the Particulars column name and have the State in seperate column
library(splitstackshape)
rawdata$Particulars = as.character(rawdata$Particulars)
rawdata = cSplit(rawdata, "Particulars", " ")
rawdata = rawdata[ , -c("Particulars_1","Particulars_2","Particulars_3","Particulars_4")]
rawdata$State = paste(rawdata$Particulars_5,rawdata$Particulars_6)
rawdata = rawdata[ , -c("Particulars_5","Particulars_6","X3.2014")]
rawdata$State = sub("NA","",rawdata$State)
rawdata$TotalStats = rawdata$X3.2004+rawdata$X3.2005+rawdata$X3.2006+rawdata$X3.2007+rawdata$X3.2008+rawdata$X3.2009+rawdata$X3.2010+rawdata$X3.2011+rawdata$X3.2012+rawdata$X3.2013
rawdata
Exploring the data by creating the visualisations
library(ggplot2)
library(plotly)
library(knitr)
library(DT)
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2004 , color = rawdata$State) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2005) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2006) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2007) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2008) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2009) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2010) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2011) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2012) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$X3.2013) + geom_bar()
ggplot(rawdata) + aes(rawdata$State,weight = rawdata$TotalStats) + geom_bar()
datatable(rawdata,option = list(pagelength = 5))
library(ggplot2)
plot1 = ggplot(rawdata) + aes(rawdata$State,rawdata$TotalStats , color=rawdata$State ) + geom_point()
library(plotly)
ggplotly(plot1)
library(crosstalk)
library(d3scatter)
shared_rawdata <- SharedData$new(rawdata)
bscols(
list(
filter_checkbox("State", "StateSelect", shared_rawdata, ~State, inline = TRUE),
filter_slider("TotalStats", "TotalStats", shared_rawdata, ~TotalStats, width = "100%")
),
d3scatter(shared_rawdata, ~TotalStats, ~X3.2004, ~State, width="100%", height=300),
d3scatter(shared_rawdata, ~TotalStats, ~X3.2005, ~State, width="100%", height=300)
)